from pathlib import Path
import logging
import os
import functools
import re
import pprint as pp
import datetime
import yaml
import h5py
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib as mpl
RC_PARAMS = dict(mpl.rcParams)
from matplotlib import pyplot as plt
import fitgrid
import fitgrid.utils as fgutil
from udck19_filenames import (
EEG_EXPT_FILES,
EEG_EPOCHS_DIR,
EEG_MODELING_DIR
)
from udck19_utils import (
get_udck19_logger,
check_ENV,
EEG_EXPT_SPECS,
EEG_26_STREAMS,
RHS_VARS,
LMER_MODELS,
LMER_MODELS_BY_EXPT,
standardize,
read_fg_summaries_hdf,
formula_to_name,
plotchans,
MPL_32_CHAN,
MPL_MIDLINE,
udck19_figsave,
panel_from_idx,
FIG_TAG_SPECS,
)
# enforce active conda environment
check_ENV()
# logging config
__file__ = 'udck19_pipeline_3.ipynb'
logging.shutdown()
LOGGER = get_udck19_logger(__file__)
pipeline_start = datetime.datetime.now()
LOGGER.info(f"""
udck19 Supplementary Materials 3
CONDA_DEFAULT_ENV: {os.environ['CONDA_DEFAULT_ENV']}
pandas: {pd.__version__}
fitgrid: {fitgrid.__version__}
Start {pipeline_start.strftime("%d.%b %Y %H:%M:%S")}
""")
FIG_PREFIX = 'udck19_pipeline_3 Fig'
FIG_COUNT = 1
PRERUN = False # True
if PRERUN:
step = 5
chans = 4
prerun_pfx = f"step{step}_chans{chans}_"
modl_path = EEG_MODELING_DIR / "prerun"
else:
prerun_pfx = ""
modl_path = EEG_MODELING_DIR
# expt as a random effect
LMER_ACZ_RANEF_F = modl_path / f"{prerun_pfx}lmer_acz_ranef.h5"
# expt as a fixed effect
LMER_ACZ_X_EXPT_F = modl_path / f"{prerun_pfx}lmer_acz_x_expt_ranef.h5"
# for experiments separately
BY_EXPT_PFX = f"{prerun_pfx}lmer_acz_comp_"
assert LMER_ACZ_RANEF_F.exists()
assert LMER_ACZ_X_EXPT_F.exists()
# for separate experiment plots
mpl.rcParams['figure.max_open_warning'] = 30
style='seaborn-bright'
# set plot pchan params per beta
beta_kws = {
"(Intercept)": {
'margins': {'bottom': 0.15},
'axes': {"ylim": (-4, 4)}, # these scale y-extent of the waveforms
'cal': {
'ylabel': "$\mu V$",
'yticks': (-2, 2),
},
'chan_label': 'north',
},
"article_cloze_z": {
'margins': {'bottom': 0.15},
'axes': {"ylim": (-.25, 0.75)},
'cal': {
'ylabel': "$\mu V / SD_{article\_cloze}$",
'yticks': (0, .5),
},
'chan_label': 'north'
},
}
if PRERUN:
layout = MPL_MIDLINE
for beta, kws in beta_kws.items():
kws.update({'chan_label': 'east'})
else:
layout = MPL_32_CHAN
# highlight ... alpha=0 to disable
n4_highlight = {
'xmin': 300,
'xmax': 500,
'color': 'magenta',
'alpha': 0.2
}
(Intercept), always modeled, i.e., all formulae have an implicit 1 + ...
article cloze, centered and scaled, modeled as the fixed effect of interest
subject, item, and experiment modeled as random effects with random intercepts and slopes
The maximal model, at time $t$,
EEG(t) =
1 +
article_cloze_z
+ (article_cloze_z | expt)
+ (article_cloze_z | sub_id)
+ (article_cloze_z | article_item_id)
lme4::lmer via pymer4¶Akiake Information Criterion (AIC) goodness of fit is assessed at all channels and timepoints via $\Delta_{i} = AIC_{i} - AIC_{min}$ for all models $i$ in the set. By definition $\Delta_{min} = 0$, other $\Delta_{i}$ are positive. Relative to the model with the lowest AIC,
"Some simple rules of thumb are often useful in assessing the relative merits of models in the set: Models having $\Delta_{i} < 2$ have substantial support (evidence), those in which $4 \leq \Delta_{i} \leq 7$ have considerably less support, and models having $\Delta_{i} \gt 10 $ have essentially no support." Burnham and Anderson, 2002, p. 271.
Selection heuristics:
$\Delta_{i}$ < 2: no empirical grounds for choosing between the alternatives based on the data and specified model structures (although other considerations, e.g., parsimony, may be relevant).
$\Delta_{i}$ > 4: reasonable grounds for selecting the model with $AIC_{min}$ based on the data and specified model structures.
Vary random effects structure with fixed-effects of intercept and article_cloze held constant
Begin with the maximal random effects structure, drop higher order terms first: slopes then intercepts.
Keep it maximal (KIM): stop dropping terms as soon the fitting algorithm consistently converges.
Keep it parsimonious (KIP): continue dropping terms until the simplified model has "considerably less support".
LOGGER.info(f"""
Loading fitted models: {LMER_ACZ_RANEF_F}
""")
model_set = "lmer_acz_ranef"
LOGGER.info(
"model set: " + model_set + "\n" + "\n".join(LMER_MODELS[model_set])
)
ranefs = [fmla for fmla in LMER_MODELS[model_set] if fmla.split()[0] == 'article_cloze_z']
assert len(ranefs) == 11
LOGGER.info("Random effects model set\n" + "\n".join(ranefs))
LOGGER.info(f"LMER_ACZ_RANEF_F: {LMER_ACZ_RANEF_F}")
ranef_summaries_df = read_fg_summaries_hdf(LMER_ACZ_RANEF_F, ranefs)
plt.close('all')
f, axs = fgutil.summary.plot_AICmin_deltas(ranef_summaries_df)
f.set_size_inches(12, 48)
f.subplots_adjust(hspace=.3)
fig_tag = f"{FIG_PREFIX} {FIG_COUNT} acz ranef AIC"
f.suptitle(fig_tag, **FIG_TAG_SPECS)
for row in range(axs.shape[0]):
ax = axs[row, 0]
ax.set(ylim=(0,50))
# panel label
ax.text(
x=-0.1,
y=1.0,
s=f"{panel_from_idx(row)})",
horizontalalignment='right',
fontsize='x-large',
fontweight='bold',
transform=ax.transAxes
)
for axj in [0,1]:
axs[row, axj].axvspan(**n4_highlight)
# move channel legend to the bottom
if ax.get_legend():
ax.get_legend().remove()
if row == axs.shape[0] - 1:
ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=4)
FIG_COUNT = udck19_figsave(f, fig_tag, FIG_COUNT)
All candidates with random slopes for items have substantial issues with convergence, as do models with random slopes for experiment albeit to a lesser extent.
The models with the most complex random effects structure that consistently converges is
article_cloze_z + (1 | expt) + (article_cloze_z | sub_id) + (1 | article_item_id)
Stop.
Dropping the subject slope does not substantially change the fit, so the data do not justify the added complexity.
article_cloze_z + (1 | expt) + (1 | sub_id) + (1 | article_item_id)
Dropping any one of the three random intercept terms makes the fit substantially worse.
Stop
For these data the two stopping rules converge on the form of model, differing only on whether or not to retain the subject slope:
KIM: article_cloze_z + (1 | expt) + (article_cloze_z | sub_id) + (1 | article_item_id)
KIP: article_cloze_z + (1 | expt) + (1 | sub_id) + (1 | article_item_id)
Hold random effects structure constant, compare model pairs: full model with vs. reduced model without the fixed effect of article cloze
$\Delta_{i} < 2$ no empirical grounds for choosing between alternatives.
$\Delta_{i}> 4$ has substantially less empirical support.
lmer_acz_comps = {
# MAX included to illustrate fitting warnings
"MAX": (
'article_cloze_z + (article_cloze_z | expt) + (article_cloze_z | sub_id) + (1 | article_item_id)',
'(article_cloze_z | expt) + (article_cloze_z | sub_id) + (1 | article_item_id)',
),
"KIM": (
'article_cloze_z + (1 | expt) + (article_cloze_z | sub_id) + (1 | article_item_id)',
'(1 | expt) + (article_cloze_z | sub_id) + (1 | article_item_id)',
),
"KIP": (
'article_cloze_z + (1 | expt) + (1 | sub_id) + (1 | article_item_id)',
'(1 | expt) + (1 | sub_id) + (1 | article_item_id)',
)
}
for x0, x1 in [(-1500, 1500), (-200, 600)]:
for proc, comp in lmer_acz_comps.items():
f, axs = fgutil.summary.plot_AICmin_deltas(
read_fg_summaries_hdf(LMER_ACZ_RANEF_F, comp).query("Time >= @x0 and Time <= @x1")
)
f.set_size_inches(16, 10)
fig_tag = f"{FIG_PREFIX} {FIG_COUNT} {proc} AIC {x0} {x1}"
f.suptitle(
fig_tag,
x=0,
y=1,
fontsize='x-large',
fontweight='bold'
)
n_rows, n_cols = axs.shape
for row in range(n_rows):
ax = axs[row, 0]
# panel label
ax.text(
x=-0.1,
y=1.0,
s=f"{panel_from_idx(row)})",
horizontalalignment='right',
fontsize='x-large',
fontweight='bold',
transform=ax.transAxes
)
for axj in [0,1]:
axs[row, axj].axvspan(**n4_highlight)
ax.set(ylim=(0,25))
if ax.get_legend():
ax.get_legend().remove()
if row == n_rows - 1:
ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=4)
FIG_COUNT = udck19_figsave(f, fig_tag, FIG_COUNT)
plt.close('all')
# lookup KIM and KIP full models
full_models = [
(proc, model)
for proc, models in lmer_acz_comps.items()
for model in models
if proc in ["KIM", "KIP"] and re.match(r"^article_cloze_z", model)
]
intervals = [(-1500, 1500), (-200, 600)]
for x0, x1 in intervals:
# slurp the model summaries
rerps = read_fg_summaries_hdf(
LMER_ACZ_RANEF_F, [full_model for _, full_model in full_models]
).query('Time >= @x0 and Time <= @x1')
# ---------------------------------
# Plot KIM and KIP rERPs separately
# ---------------------------------
for (proc, full_model) in full_models:
# slice out KIM or KIP
proc_rerps = rerps.query("model == @full_model")
plots = plotchans(proc_rerps, beta_kws, style=style, layout=layout, se_ci="CI")
for plot in plots:
beta = plot['beta']
f = plot['fig']
for ax in f.get_axes():
ax.axvspan(**n4_highlight)
suptitle_txt = f._suptitle.get_text()
x, y = f._suptitle.get_position()
f.suptitle(
f"{FIG_PREFIX} {FIG_COUNT}\n{proc} {suptitle_txt}",
x=x,
y=y,
fontsize='x-large',
fontweight='bold'
)
FIG_COUNT = udck19_figsave(
f,
f"{FIG_PREFIX}_{FIG_COUNT}_{proc}_{beta}_{x0}_{x1}",
FIG_COUNT
)
# ----------------------------------------------
# overplot the KIM and KIP rERPs for comparison
# ----------------------------------------------
rerps.index = rerps.index.remove_unused_levels()
plots = plotchans(rerps, beta_kws, style=style, layout=layout, se_ci="CI")
for plot in plots:
beta = plot['beta']
f = plot['fig']
for ax in f.get_axes():
ax.axvspan(**n4_highlight)
suptitle_txt = f._suptitle.get_text()
x, y = f._suptitle.get_position()
f.suptitle(
f"{FIG_PREFIX} {FIG_COUNT}\nKIM vs. KIP {suptitle_txt}",
x=x,
y=y,
fontsize='x-large',
fontweight='bold'
)
FIG_COUNT = udck19_figsave(
f,
f"{FIG_PREFIX}_{FIG_COUNT}_KIM_KIP_{beta}_{x0}_{x1}",
FIG_COUNT
)
expt as a fixed effect AIC $\Delta_{i}$ and rERPs $\hat{\beta_{j}}$¶lmer_acz_x_expt_comps = {
'KIM': (
'article_cloze_z + expt + (article_cloze_z | sub_id) + (1 | article_item_id)',
'expt + (article_cloze_z | sub_id) + (1 | article_item_id)',
),
'KIP': (
'article_cloze_z + expt + (1 | sub_id) + (1 | article_item_id)',
'expt + (1 | sub_id) + (1 | article_item_id)'
)
}
LOGGER.info(f"""
Model comparisons with experiment as a fixed effect
{LMER_ACZ_X_EXPT_F}
{pp.pformat(lmer_acz_x_expt_comps)}
""")
for x0, x1 in [(-1500, 1500), (-200, 600)]:
for proc, comp in lmer_acz_x_expt_comps.items():
f, axs = fgutil.summary.plot_AICmin_deltas(
read_fg_summaries_hdf(LMER_ACZ_X_EXPT_F, comp).query("Time >= @x0 and Time <= @x1")
)
f.set_size_inches(12, 8)
fig_tag = f"{FIG_PREFIX} {FIG_COUNT} {proc} AIC {x0} {x1}"
f.suptitle(fig_tag, **FIG_TAG_SPECS)
n_rows, n_cols = axs.shape
for row in range(n_rows):
ax = axs[row, 0]
# panel label
ax.text(
x=-0.1,
y=1.0,
s=f"{panel_from_idx(row)})",
horizontalalignment='right',
fontsize='x-large',
fontweight='bold',
transform=ax.transAxes
)
for axj in [0,1]:
axs[row, axj].axvspan(**n4_highlight)
ax.set(ylim=(0,25))
if ax.get_legend():
ax.get_legend().remove()
if row == n_rows - 1:
ax.legend(loc='upper left', bbox_to_anchor=(0, -0.1), ncol=4)
FIG_COUNT = udck19_figsave(f, fig_tag, FIG_COUNT)
lmer_acz_x_expt_comps_full_models = [
(proc, model)
for proc, models in lmer_acz_x_expt_comps.items()
for model in models
if re.match(r"^article_cloze_z", model)
]
intervals = [(-1500, 1500), (-200, 600)]
for x0, x1 in intervals:
# slurp the model summaries
rerps_acz_x_expt = read_fg_summaries_hdf(
LMER_ACZ_X_EXPT_F, [model for _, model in lmer_acz_x_expt_comps_full_models]
).query('Time >= @x0 and Time <= @x1 and beta in ["(Intercept)", "article_cloze_z"]')
# ---------------------------------
# Plot KIM and KIP rERPs separately
# ---------------------------------
for (proc, full_model) in lmer_acz_x_expt_comps_full_models:
# slice out KIM or KIP
proc_rerps = rerps_acz_x_expt.query("model == @full_model")
plots = plotchans(proc_rerps, beta_kws, style=style, layout=layout, se_ci="CI")
for plot in plots:
beta = plot['beta']
f = plot['fig']
for ax in f.get_axes():
ax.axvspan(**n4_highlight)
suptitle_txt = f._suptitle.get_text()
x, y = f._suptitle.get_position()
f.suptitle(
f"{FIG_PREFIX} {FIG_COUNT}\n{proc} {suptitle_txt}",
x=x,
y=y,
fontsize='x-large',
fontweight='bold'
)
FIG_COUNT = udck19_figsave(
f,
f"{FIG_PREFIX}_{FIG_COUNT}_{proc}_{beta}_{x0}_{x1}",
FIG_COUNT
)
lmer_acz_comps_x_expt = {
'KIM': [
'article_cloze_z + (article_cloze_z | sub_id) + (1 | article_item_id)',
'(article_cloze_z | sub_id) + (1 | article_item_id)',
],
'KIP': [
'article_cloze_z + (1 | sub_id) + (1 | article_item_id)',
'(1 | sub_id) + (1 | article_item_id)',
]
}
LOGGER.info(f"""
Model comparisons for each data set separately
{pp.pformat(lmer_acz_comps_x_expt)}
""")
for x0, x1 in [(-1500, 1500), (-200, 600)]:
for expt in EEG_EXPT_SPECS.keys():
lmer_acz_ranef_expt_f = modl_path / f"{BY_EXPT_PFX}{expt}.h5"
assert (lmer_acz_ranef_expt_f).exists()
for proc, models in lmer_acz_comps_x_expt.items():
summary = read_fg_summaries_hdf(lmer_acz_ranef_expt_f, models)
# slice time interval and model pairs
qstr = "Time >= @x0 and Time <= @x1 and model in @models"
# print(expt, proc, models)
f, axs = fgutil.summary.plot_AICmin_deltas(summary.query(qstr))
f.set_size_inches(16, 10)
fig_tag = f"{FIG_PREFIX} {FIG_COUNT} {expt} {proc} AIC {x0} {x1}"
f.suptitle(fig_tag, **FIG_TAG_SPECS)
n_rows, n_cols = axs.shape
for row in range(n_rows):
ax = axs[row, 0]
ax.set(yticks=[0, 2, 4, 7, 10, 15, 20])
# panel label
ax.text(
x=-0.1,
y=1.0,
s=f"{panel_from_idx(row)})",
horizontalalignment='right',
fontsize='x-large',
fontweight='bold',
transform=ax.transAxes
)
for axj in [0,1]:
axs[row, axj].axvspan(**n4_highlight)
ax.set(ylim=(0,25))
if ax.get_legend():
ax.get_legend().remove()
if row == n_rows - 1:
ax.legend(
loc='upper left',
bbox_to_anchor=(0, -0.1),
ncol=4
)
FIG_COUNT = udck19_figsave(f, fig_tag, FIG_COUNT)
plt.close('all')
full_models_expt = [
(proc, model)
for proc, models in lmer_acz_comps_x_expt.items()
for model in models
if proc in ["KIM", "KIP"] and re.match(r"^article_cloze_z", model)
]
# as above, for each experiment
for expt in EEG_EXPT_SPECS.keys():
# set the summary data file this experiment
lmer_acz_ranef_expt_f = modl_path / f"{BY_EXPT_PFX}{expt}.h5"
assert (lmer_acz_ranef_expt_f).exists()
# for each interval
for x0, x1 in [(-1500, 1500), (-200, 1200)]:
# lookup the KIM, KIP summaries
for proc, fmodel in full_models_expt:
# fetch experiment model summary data
rerps = read_fg_summaries_hdf(
lmer_acz_ranef_expt_f, [fmodel]
).query('Time >= @x0 and Time <= @x1')
plots = plotchans(rerps, beta_kws, style=style, layout=layout, se_ci="CI")
for plot in plots:
beta = plot['beta']
f = plot['fig']
suptitle_txt = f._suptitle.get_text()
x, y = f._suptitle.get_position()
suptitle_txt = f"{FIG_PREFIX} {FIG_COUNT}\n{expt} {proc} {suptitle_txt}"
f.suptitle(
suptitle_txt,
x=x,
y=y,
fontsize='x-large',
fontweight='bold'
)
for ax in f.get_axes():
ax.axvspan(**n4_highlight)
FIG_COUNT = udck19_figsave(
f,
f"{FIG_PREFIX}_{FIG_COUNT}_{expt}_{proc}_{beta}_{x0}_{x1}",
FIG_COUNT
)
# log execution time
pipeline_stop = datetime.datetime.now()
elapsed = pipeline_stop - pipeline_start
LOGGER.info(f"""
Done {pipeline_stop.strftime("%d.%b %Y %H:%M:%S")}
Elapsed time: {elapsed}
""")